########################################################################
##
##  Purpose: This script produces Figure 3 from the manuscript
##  Author: james.h.bisbee@vanderbilt.edu
##  Date: 05/18/2024
##
##  Input Files:
##    - ./data/for_replication.rds
##  Output Files:
##    - ./output/Figures/MUSK_fig3.pdf
##
########################################################################
rm(list = ls())
gc()

require(changepoint)
require(tidyverse)
require(ggridges)


# Compute details
Sys.info()
# sysname        release        version       nodename 
# "Windows"       "10 x64"  "build 22631"   "AS-GR3RGY3" 
# machine          login           user effective_user 
# "x86-64"     "bisbeejh"     "bisbeejh"     "bisbeejh"

ram_size = system("wmic MemoryChip get Capacity", intern = TRUE)[-1]
model_name = system("wmic cpu get name", intern = TRUE)[2] # nocov
vendor_id = system("wmic cpu get manufacturer", intern = TRUE)[2] # nocov

print(list(ram = stringr::str_squish(ram_size)[1],
           vendor_id = stringr::str_squish(vendor_id),
           model_name = stringr::str_squish(model_name),
           no_of_cores = parallel::detectCores()))

# $ram
# [1] "17179869184"
# 
# $vendor_id
# [1] "GenuineIntel"
# 
# $model_name
# [1] "13th Gen Intel(R) Core(TM) i9-13900H"
# 
# $no_of_cores
# [1] 20

# Package details
sessionInfo()

# attached base packages:
# [1] stats     graphics  grDevices utils     datasets 
# [6] methods   base     
# 
# other attached packages:
#  [1] ggridges_0.5.4    lubridate_1.9.2   forcats_1.0.0    
#  [4] stringr_1.5.0     dplyr_1.1.2       purrr_1.0.1      
#  [7] readr_2.1.4       tidyr_1.3.0       tibble_3.2.1     
# [10] ggplot2_3.4.4     tidyverse_2.0.0   changepoint_2.2.4
# [13] zoo_1.8-12       
# 
# loaded via a namespace (and not attached):
#  [1] vctrs_0.6.3       cli_3.6.1         rlang_1.1.1      
#  [4] stringi_1.7.12    generics_0.1.3    glue_1.6.2       
#  [7] colorspace_2.1-0  hms_1.1.3         scales_1.3.0     
# [10] fansi_1.0.4       grid_4.3.3        munsell_0.5.0    
# [13] tzdb_0.4.0        lifecycle_1.0.3   compiler_4.3.3   
# [16] timechange_0.2.0  pkgconfig_2.0.3   rstudioapi_0.15.0
# [19] lattice_0.22-5    R6_2.5.1          tidyselect_1.2.0 
# [22] utf8_1.2.3        pillar_1.9.0      magrittr_2.0.3   
# [25] tools_4.3.3       withr_2.5.0       gtable_0.3.3

finalFullUsers <- read_rds('./data/for_replication.rds')

finalFullUsersSmaller <- finalFullUsers %>%
  filter(created_at >= as.Date('2022-01-01'),
         created_at <= as.Date('2023-06-01'))

toplot <- finalFullUsers %>%
  group_by(created_at) %>%
  summarise(mTweets = mean(nTweets,na.rm=T),
            nTweets = sum(nTweets,na.rm=T),
            nAccounts = length(unique(sn))) %>%
  gather(metric,value,-created_at) %>%
  ungroup() %>%
  mutate(weekday = lubridate::wday(created_at,label = T)) %>%
  mutate(weekend = ifelse(weekday %in% c('Sun','Mon'),'Weekend','Weekday'))

# BCP confirmation
bcpRes <- NULL
for(met in c('nTweets','nAccounts')) {
  for(t in unique(finalFullUsersSmaller$type)) {
    if(is.na(t)) { next }
    cat(met,t,'\n')
    # stop()
    toplotTmpF <- finalFullUsersSmaller %>%
      filter(type == t) %>%
      group_by(created_at) %>%
      summarise(mTweets = mean(nTweets,na.rm=T),
                nTweets = sum(nTweets,na.rm=T),
                nAccounts = length(unique(sn))) %>%
      gather(metric,value,-created_at) %>%
      ungroup() %>%
      filter(metric == met)
    
    cptTmp <- cpt.mean(toplotTmpF %>% 
                         pull(value))
    
    bcpRes <- bcpRes %>%
      bind_rows(data.frame(metric = met,
                           type = t,
                           date = (toplotTmpF %>% 
                                     pull(created_at))[cptTmp@cpts[1]],
                           sample = 'Full') %>%
                  as_tibble())

    for(i in 1:100) {
      samp <- sample(unique(finalFullUsersSmaller$sn),size = 5000,replace = T)
      toplotTmp <- finalFullUsersSmaller %>%
        filter(type == t) %>%
        filter(sn %in% samp) %>%
        group_by(created_at) %>%
        summarise(mTweets = mean(nTweets,na.rm=T),
                  nTweets = sum(nTweets,na.rm=T),
                  nAccounts = length(unique(sn))) %>%
        gather(metric,value,-created_at) %>%
        ungroup() %>%
        filter(metric == met)
               
      cptTmp <- changepoint::cpt.mean(toplotTmp$value)
      bcpRes <- bcpRes %>%
        bind_rows(data.frame(metric = met,
                             type = t,
                             date = (toplotTmp %>%
                                       pull(created_at))[cptTmp@cpts[1]],
                             sample = paste0('bs_',i)) %>%
                    as_tibble())
    }
  }
}

pdf('./output/Figures/MUSK_fig3.pdf',width = 7,height = 5)
bcpRes %>%
  filter(sample != 'Full') %>%
  mutate(metric = ifelse(metric == 'nAccounts','# of accounts tweeting','# of tweets posted')) %>%
  ggplot(aes(x = date)) + 
  geom_bar(color = 'grey30',size = 1.2) + 
  facet_grid(type~metric,scales = 'free') + 
  geom_vline(xintercept = as.Date('2022-10-28'),
             linetype = 'dashed',color = 'red') + 
  theme_bw() + 
  labs(x = 'Date',y = '% of 100 Random Samples',
       title = 'Bootstrapped BCP Estimate',
       subtitle = '100 random samples for two measures (columns) and four types of tweets (rows)')
dev.off()

# EOF